<?php
/**
 * crawl_airport.php
 * 描述
 *
 * 作者:  胡毅 (huyi@comsenz.com)
 * 创建时间: 2012-10-13 下午4:24:14
 * 修改记录:
 *
 * $id$
 */

$conf['feeyo']['airpode_codes'] = array(
        // 全站设置
        'conf' => array(
                'charset' => 'gbk',
                'webName' => '飞友网',
                // 重试次数,必填
                'retry' => 0,
                ),
        // 抓取配置
        'crawl_conf' => array(
                // 抓取城市链接，必填
                'url' => 'http://www.feeyo.com/airport_code.asp?page={#page}',
                'urlPage' => array('start'=>1,'end'=>57),
                // 方式必填
                'method' => 'get',
                // 如果为post，填参数，选填，如果为get，表示追加参数
                'postData' => array(),
                // 抓取信息，必填
                'crawlInfo' => array(
                        // 先驱查找
                        'pre' => array(
                                0 => array(
                                        'Find' => array(
                                                '[class=tab_hand f12s]',
                                                0,
                                                'innertext',
                                        ),
                                ),
                                1 => array(
                                        'Find' => array(
                                                'tr',
                                                'all',
                                                'outertext',
                                        ),
                                ),
                                2 => array(
                                        'Match' => array(
                                                false,
                                                '/<tr onmouseover[\s\S]+<td colspan/',
                                                'all',
                                        ),
                                ),
                                3 => array(
                                        'Replace' => array(
                                                '/<tr height=30>|<td colspan/',
                                                '',
                                        ),
                                ),
                                4 => array(
                                        'Find' => array(
                                                'tr',
                                                'all-a',
                                                'innertext',
                                        ),
                                ),
                        ),
                        'acCityName' => array(
                                0 => array(
                                        'Find' => array(
                                                'a',
                                                0,
                                                'plaintext',
                                        ),
                                ),
                        ),
                        'threeCode' => array(
                                0 => array(
                                        'Find' => array(
                                                'td',
                                                1,
                                                'plaintext',
                                        ),
                                ),
                        ),
                        'fourCode' => array(
                                0 => array(
                                        'Find' => array(
                                                'td',
                                                2,
                                                'plaintext',
                                        ),
                                ),
                        ),
                        'acAirpotName' => array(
                                0 => array(
                                        'Find' => array(
                                                'td',
                                                3,
                                                'plaintext',
                                        ),
                                ),
                        ),
                        'acCityPinyin' => array(
                                0 => array(
                                        'Find' => array(
                                                'td',
                                                4,
                                                'plaintext',
                                        ),
                                ),
                        ),
                ),
        ),
        );